package com.daervin.svc.parser;

import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import com.daervin.svc.common.dto.ThreeKRDTO;
import com.daervin.svc.common.dto.ThreeKRItemDTO;
import org.apache.log4j.Logger;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Json;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static com.daervin.svc.common.constants.SourceEnum.KR_36;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class ThreeKRParser extends RootParser {

    private final static Logger LOGGER = Logger.getLogger(ThreeKRParser.class);

    public ThreeKRParser(String url) {
        super(url);
    }

    @Override
    public void listProcess(Page page) {
        Json responseJson = page.getJson();
        ThreeKRDTO response = responseJson.toObject(ThreeKRDTO.class);
        if (response == null || StringUtils.isEmpty(response.getData()) || CollectionUtils.isEmpty(response.getData().getItems())) {
            return;
        }
        List<NewsDTO> newsList = new ArrayList<>();
        Set<String> titleSet = new HashSet<>();
        for (ThreeKRItemDTO item : response.getData().getItems()) {
            try {
                //先行过滤一把
                if (titleSet.contains(item.getTitle())) {
                    continue;
                }
                titleSet.add(item.getTitle());
                NewsDTO dto = new NewsDTO();
                dto.setTitle(item.getTitle());
                dto.setDesc(item.getDescription());
                dto.setBelongDate(item.getPublishedAt());
                dto.setAnnouncer(KR_36.announcer);
                dto.setCategory(KR_36.category);
                dto.setLinks(item.getNewsUrl() == null ? "" : item.getNewsUrl());
                newsList.add(dto);
            } catch (Exception e) {
                System.err.println("ThreeKRParser error: " + e.getMessage());
                LOGGER.error("ThreeKRParser error", e);
            }
        }
        page.putField(Constants.PARSER_RESULT, newsList);
    }
}
